In [ ]:
!pip install open3d
In [2]:
import cv2
import numpy as np
import open3d as o3d
import matplotlib.pyplot as plt
from torchvision import models
import torch

Clone the SoccerNet Camera Calibration repository¶

In [3]:
!git clone https://github.com/SoccerNet/sn-calibration
Cloning into 'sn-calibration'...
remote: Enumerating objects: 232, done.
remote: Counting objects: 100% (82/82), done.
remote: Compressing objects: 100% (34/34), done.
remote: Total 232 (delta 60), reused 53 (delta 48), pack-reused 150 (from 1)
Receiving objects: 100% (232/232), 2.68 MiB | 34.30 MiB/s, done.
Resolving deltas: 100% (131/131), done.
In [4]:
%cd sn-calibration
/content/sn-calibration
In [5]:
%cd src/
/content/sn-calibration/src
In [6]:
%ls
baseline_cameras.py  detect_extremities.py  evaluate_extremities.py
camera.py            evalai_camera.py       soccerpitch.py
dataloader.py        evaluate_camera.py

NOTE: Download weights from https://github.com/SoccerNet/sn-calibration (do f1 "drive"to find link) and move inside resources

Go into src/detect_extremities.py (double clic to open editor on colab) and remove src from "from src.soccerpitch import SoccerPitch" and save.

In [7]:
from detect_extremities import SegmentationNetwork
In [8]:
seg_net = SegmentationNetwork(
    "../resources/soccer_pitch_segmentation.pth",
    "../resources/mean.npy",
    "../resources/std.npy"
)
/usr/local/lib/python3.11/dist-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=None`.
  warnings.warn(msg)
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 204MB/s]
In [9]:
from soccerpitch import SoccerPitch

lines_palette = [0, 0, 0]
for line_class in SoccerPitch.lines_classes:
  lines_palette.extend(SoccerPitch.palette[line_class])

NOTE: Move img1 and img2 to sn-calibration/images.

Image 1¶

In [244]:
from PIL import Image
import cv2

image = cv2.imread("../images/img1.PNG")
image = cv2.resize(image, (960, 540))
print(image.shape)
rgbimage = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(rgbimage)
pil_image
(540, 960, 3)
Out[244]:
No description has been provided for this image

Regulatory pitch¶

In [245]:
from camera import Camera

black_img = np.zeros((548, 960, 3), dtype=np.uint8)

camera = Camera()
camera.xfocal_length = 400
camera.yfocal_length = 400
camera.position = [0, 0, -60]
camera.draw_colorful_pitch(black_img, SoccerPitch.palette)
rgb_model = cv2.cvtColor(black_img, cv2.COLOR_BGR2RGB)
pitch_model = Image.fromarray(rgb_model)
pitch_model
Out[245]:
No description has been provided for this image
In [246]:
import cv2
import numpy as np
from PIL import Image

# Step 1: Get raw segmentation output
seg_mask = seg_net.analyse_image(image)
pitch = SoccerPitch()

# Step 2: Create empty mask for cleaned results
cleaned_mask = np.zeros_like(seg_mask, dtype=np.uint8)
line_data = []

# Step 3: Extract lines from the segmentation mask
for class_idx, class_name in enumerate(SoccerPitch.lines_classes):
    if "post" in class_name or "crossbar" in class_name or "Circle" in class_name:
        continue

    class_mask = (seg_mask == class_idx + 1).astype(np.uint8)
    color = SoccerPitch.palette[class_name]

    kernel = np.ones((5, 5), np.uint8)
    class_mask = cv2.morphologyEx(class_mask, cv2.MORPH_OPEN, kernel)

    contours, _ = cv2.findContours(class_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        [vx, vy, px, py] = cv2.fitLine(largest_contour, cv2.DIST_L2, 0, 0.01, 0.01)
        points = largest_contour[:, 0, :]

        if abs(vx) > abs(vy):
            min_x, max_x = np.min(points[:, 0]), np.max(points[:, 0])
            y1 = int(((min_x - px) * vy / vx) + py)
            y2 = int(((max_x - px) * vy / vx) + py)
            pt1, pt2 = (min_x, y1), (max_x, y2)
        else:
            min_y, max_y = np.min(points[:, 1]), np.max(points[:, 1])
            x1 = int(((min_y - py) * vx / vy) + px)
            x2 = int(((max_y - py) * vx / vy) + px)
            pt1, pt2 = (x1, min_y), (x2, max_y)

        line_data.append({
            'class_name': class_name,
            'class_idx': class_idx + 1,
            'pt1': pt1,
            'pt2': pt2,
        })

# Step 4: Merge close endpoints
threshold = 20
def close(p1, p2, t=threshold):
    return np.linalg.norm(np.array(p1) - np.array(p2)) < t

for i in range(len(line_data)):
    for j in range(i + 1, len(line_data)):
        for key_i, key_j in [('pt1', 'pt1'), ('pt1', 'pt2'), ('pt2', 'pt1'), ('pt2', 'pt2')]:
            p1 = line_data[i][key_i]
            p2 = line_data[j][key_j]
            if close(p1, p2):
                avg = tuple(np.mean([p1, p2], axis=0).astype(int))
                line_data[i][key_i] = avg
                line_data[j][key_j] = avg

# Step 5: Draw lines and store homogeneous line equations
extremities_image = image.copy()
height, width = image.shape[:2]

line_matches = []
for line in line_data:
    label = line['class_name']
    color = pitch.palette.get(label, (255, 255, 255))  # fallback color if missing

    # Scale from segmentation coords (360x640) to actual image size
    pt1 = (
        int(line['pt1'][0] * (width - 1) / 640.),
        int(line['pt1'][1] * (height - 1) / 360.)
    )
    pt2 = (
        int(line['pt2'][0] * (width - 1) / 640.),
        int(line['pt2'][1] * (height - 1) / 360.)
    )

    # Draw the line
    cv2.line(extremities_image, pt1, pt2, color, 2)

    # Get homogeneous line
    img_line = np.cross([pt1[0], pt1[1], 1], [pt2[0], pt2[1], 1])
    if not np.any(np.isnan(img_line)) and not np.any(np.isinf(img_line)):
        pitch_line = pitch.get_2d_homogeneous_line(label)
        if pitch_line is not None:
            line_matches.append((pitch_line, img_line))

# Step 6: Convert to PIL image for display
extremities_pil = Image.fromarray(extremities_image)
extremities_pil
<ipython-input-246-fcd3c44d6ee4>:32: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  y1 = int(((min_x - px) * vy / vx) + py)
<ipython-input-246-fcd3c44d6ee4>:33: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  y2 = int(((max_x - px) * vy / vx) + py)
Out[246]:
No description has been provided for this image

Compute homography from 3d pitch to 2D image.

In [247]:
def estimate_homography_from_line_correspondences(lines, T1=np.eye(3), T2=np.eye(3)):
    """
    Given lines correspondences, computes the homography that maps best the two set of lines.
    :param lines: list of pair of 2D lines matches.
    :param T1: Similarity transform to normalize the elements of the source reference system
    :param T2: Similarity transform to normalize the elements of the target reference system
    :return: boolean to indicate success or failure of the estimation, homography
    """
    homography = np.eye(3)
    A = np.zeros((len(lines) * 2, 9))

    for i, line_pair in enumerate(lines):
        src_line = np.transpose(np.linalg.inv(T1)) @ line_pair[0]
        target_line = np.transpose(np.linalg.inv(T2)) @ line_pair[1]
        u = src_line[0]
        v = src_line[1]
        w = src_line[2]

        x = target_line[0]
        y = target_line[1]
        z = target_line[2]

        A[2 * i, 0] = 0
        A[2 * i, 1] = x * w
        A[2 * i, 2] = -x * v
        A[2 * i, 3] = 0
        A[2 * i, 4] = y * w
        A[2 * i, 5] = -v * y
        A[2 * i, 6] = 0
        A[2 * i, 7] = z * w
        A[2 * i, 8] = -v * z

        A[2 * i + 1, 0] = x * w
        A[2 * i + 1, 1] = 0
        A[2 * i + 1, 2] = -x * u
        A[2 * i + 1, 3] = y * w
        A[2 * i + 1, 4] = 0
        A[2 * i + 1, 5] = -u * y
        A[2 * i + 1, 6] = z * w
        A[2 * i + 1, 7] = 0
        A[2 * i + 1, 8] = -u * z

    try:
        u, s, vh = np.linalg.svd(A)
    except np.linalg.LinAlgError:
        return False, homography
    v = np.eye(3)
    has_positive_singular_value = False
    for i in range(s.shape[0] - 1, -2, -1):
        v = np.reshape(vh[i], (3, 3))

        if s[i] > 0:
            has_positive_singular_value = True
            break

    if not has_positive_singular_value:
        return False, homography

    homography = np.reshape(v, (3, 3))
    homography = np.linalg.inv(T2) @ homography @ T1
    homography /= homography[2, 2]

    return True, homography

success, homography1 = estimate_homography_from_line_correspondences(line_matches)
homography1
Out[247]:
array([[ 4.38470704e+01,  1.99929827e+01, -1.36688802e+03],
       [-3.24970548e+00,  4.96532926e+00,  4.63188794e+02],
       [ 8.77966450e-03, -1.29054718e-02,  1.00000000e+00]])

Again remove src from the file.

Fit the regulatory pitch into the image using the homography.

In [248]:
from baseline_cameras import draw_pitch_homography

cv_image = draw_pitch_homography(rgbimage, homography1)
homography_pil = Image.fromarray(cv_image)
homography_pil
Out[248]:
No description has been provided for this image

Compute intrinsic and extrinsic parameters from homography¶

Compute K

In [270]:
def estimate_calibration_matrix_from_plane_homography(cam, homography):
        """
        This method initializes the calibration matrix from the homography between the world plane of the pitch
        and the image. It is based on the extraction of the calibration matrix from the homography (Algorithm 8.2 of
        Multiple View Geometry in computer vision, p225). The extraction is sensitive to noise, which is why we keep the
        principal point in the middle of the image rather than using the one extracted by this method.
        :param homography: homography between the world plane of the pitch and the image
        """
        H = np.reshape(homography, (9,))
        A = np.zeros((5, 6))
        A[0, 1] = 1.
        A[1, 0] = 1.
        A[1, 2] = -1.
        A[2, 3] = cam.principal_point[1] / cam.principal_point[0]
        A[2, 4] = -1.0
        A[3, 0] = H[0] * H[1]
        A[3, 1] = H[0] * H[4] + H[1] * H[3]
        A[3, 2] = H[3] * H[4]
        A[3, 3] = H[0] * H[7] + H[1] * H[6]
        A[3, 4] = H[3] * H[7] + H[4] * H[6]
        A[3, 5] = H[6] * H[7]
        A[4, 0] = H[0] * H[0] - H[1] * H[1]
        A[4, 1] = 2 * H[0] * H[3] - 2 * H[1] * H[4]
        A[4, 2] = H[3] * H[3] - H[4] * H[4]
        A[4, 3] = 2 * H[0] * H[6] - 2 * H[1] * H[7]
        A[4, 4] = 2 * H[3] * H[6] - 2 * H[4] * H[7]
        A[4, 5] = H[6] * H[6] - H[7] * H[7]

        u, s, vh = np.linalg.svd(A)
        w = vh[-1]
        W = np.zeros((3, 3))
        W[0, 0] = w[0] / w[5]
        W[0, 1] = w[1] / w[5]
        W[0, 2] = w[3] / w[5]
        W[1, 0] = w[1] / w[5]
        W[1, 1] = w[2] / w[5]
        W[1, 2] = w[4] / w[5]
        W[2, 0] = w[3] / w[5]
        W[2, 1] = w[4] / w[5]
        W[2, 2] = w[5] / w[5]

        try:
            Ktinv = np.linalg.cholesky(W)
        except np.linalg.LinAlgError:
            K = np.eye(3)
            return False, K

        K = np.linalg.inv(np.transpose(Ktinv))
        K /= K[2, 2]

        cam.xfocal_length = K[0, 0]
        cam.yfocal_length = K[1, 1]
        # the principal point estimated by this method is very noisy, better keep it in the center of the image
        #cam.principal_point = (cam.image_width / 2, cam.image_height / 2)
        cam.principal_point = (K[0,2], K[1,2])
        cam.calibration = np.array([
            [cam.xfocal_length, 0, cam.principal_point[0]],
            [0, cam.yfocal_length, cam.principal_point[1]],
            [0, 0, 1]
        ], dtype='float')
        return True, K

Compute R and t

In [271]:
def from_homography(cam, homography):
        """
        This method initializes the essential camera parameters from the homography between the world plane of the pitch
        and the image. It is based on the extraction of the calibration matrix from the homography (Algorithm 8.2 of
        Multiple View Geometry in computer vision, p225), then using the relation between the camera parameters and the
        same homography, we extract rough rotation and position estimates (Example 8.1 of Multiple View Geometry in
        computer vision, p196).
        :param homography: The homography that captures the transformation between the 3D flat model of the soccer pitch
         and its image.
        """
        success, _ = estimate_calibration_matrix_from_plane_homography(cam, homography)
        if not success:
            return False

        hprim = np.linalg.inv(cam.calibration) @ homography
        lambda1 = 1 / np.linalg.norm(hprim[:, 0])
        lambda2 = 1 / np.linalg.norm(hprim[:, 1])
        lambda3 = np.sqrt(lambda1 * lambda2)

        r0 = hprim[:, 0] * lambda1
        r1 = hprim[:, 1] * lambda2
        r2 = np.cross(r0, r1)

        R = np.column_stack((r0, r1, r2))
        u, s, vh = np.linalg.svd(R)
        R = u @ vh
        if np.linalg.det(R) < 0:
            u[:, 2] *= -1
            R = u @ vh
        cam.rotation = R
        t = hprim[:, 2] * lambda3
        cam.position = - np.transpose(R) @ t
        return True
In [272]:
cam1 = Camera(960, 540)
success = from_homography(cam1, homography1)
cam1.to_json_parameters()
Out[272]:
{'pan_degrees': np.float64(34.22767288965573),
 'tilt_degrees': np.float64(77.23521689931756),
 'roll_degrees': np.float64(-0.12789481524650537),
 'position_meters': [-0.9886537107157112,
  72.04571258189529,
  -17.40204755919516],
 'x_focal_length': np.float64(2967.8864776321343),
 'y_focal_length': np.float64(2967.8864776320574),
 'principal_point': [np.float64(519.5423645411495),
  np.float64(292.24258005438145)],
 'radial_distortion': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'tangential_distortion': [0.0, 0.0],
 'thin_prism_distortion': [0.0, 0.0, 0.0, 0.0]}

Using the estimated camera position and parameters, view the 3D space from there.

In [273]:
black_img = np.zeros((540, 960, 3), dtype=np.uint8)
cam1.draw_colorful_pitch(black_img, SoccerPitch.palette)
rgb_model = cv2.cvtColor(black_img, cv2.COLOR_BGR2RGB)
pitch_model = Image.fromarray(rgb_model)
pitch_model
Out[273]:
No description has been provided for this image
In [274]:
image = cv2.imread("../images/img1.PNG")
image = cv2.resize(image, (960, 540))
rgbimage = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

Construct the projection matrix from the computed parameters.

In [275]:
def pan_tilt_roll_to_orientation(pan, tilt, roll):
    """
    Conversion from euler angles to orientation matrix.
    :param pan:
    :param tilt:
    :param roll:
    :return: orientation matrix
    """
    Rpan = np.array([
        [np.cos(pan), -np.sin(pan), 0],
        [np.sin(pan), np.cos(pan), 0],
        [0, 0, 1]])
    Rroll = np.array([
        [np.cos(roll), -np.sin(roll), 0],
        [np.sin(roll), np.cos(roll), 0],
        [0, 0, 1]])
    Rtilt = np.array([
        [1, 0, 0],
        [0, np.cos(tilt), -np.sin(tilt)],
        [0, np.sin(tilt), np.cos(tilt)]])
    rotMat = np.dot(Rpan, np.dot(Rtilt, Rroll))
    return rotMat
In [276]:
import numpy as np
from scipy.spatial.transform import Rotation as R

def build_projection_matrix(params):
    fx = params['x_focal_length']
    fy = params['y_focal_length']
    cx, cy = params['principal_point']
    pan = np.radians(params['pan_degrees'])
    tilt = np.radians(params['tilt_degrees'])
    roll = np.radians(params['roll_degrees'])
    C = np.array(params['position_meters'])

    K = np.array([
        [fx,  0, cx],
        [ 0, fy, cy],
        [ 0,  0,  1]
    ])

    R_matrix = pan_tilt_roll_to_orientation(pan, tilt, roll).T
    t = -R_matrix @ C.reshape((3, 1))
    Rt = np.hstack((R_matrix, t))

    P = K @ Rt
    return P

P1 = build_projection_matrix(cam1.to_json_parameters())
print(P1)
[[ 2.73970366e+03  1.24922480e+03  1.08331270e+02 -8.54074871e+04]
 [-2.03051878e+02  3.10249478e+02  2.95909971e+03  2.89415011e+04]
 [ 5.48581210e-01 -8.06374702e-01  2.20949079e-01  6.24831632e+01]]

Project the regulatory pitch in the image using the projection matrix.

In [277]:
def draw_pitch_projection_matrix(image, projection_matrix):
    """
    Draws points along the soccer pitch using a full 3x4 camera projection matrix M (P = K[R|t]).
    This function assumes that the pitch lies on the plane z=0 in world coordinates.

    :param image: the image to draw on
    :param projection_matrix: 3x4 camera projection matrix
    :return: modified image
    """
    field = SoccerPitch()
    polylines = field.sample_field_points()

    for line in polylines.values():
        for point in line:
            # We assume the pitch is at z = 0, so use only x, y
            if point[2] == 0.:
                X = np.array([point[0], point[1], 0., 1.])  # homogeneous 3D point with z = 0
                projected = X @ projection_matrix.T
                if projected[2] == 0.:
                    continue
                projected /= projected[2]
                x_img, y_img = projected[0], projected[1]                # Make sure it's in bounds
                if 0 <= x_img < image.shape[1] and 0 <= y_img < image.shape[0]:
                    cv2.circle(image, (int(x_img), int(y_img)), 1, (0, 255, 255), 1)

    return image


cv_image = draw_pitch_projection_matrix(rgbimage, P1)
homography_pil = Image.fromarray(cv_image)
homography_pil
Out[277]:
No description has been provided for this image

IMAGE 2¶

Repeat the process for the second view.

In [281]:
from PIL import Image
import cv2

image = cv2.imread("../images/img2.PNG")
image = cv2.resize(image, (960, 540))
print(image.shape)
rgbimage = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(rgbimage)
pil_image
(540, 960, 3)
Out[281]:
No description has been provided for this image

Raw segmentation

Cleaned Segmentation

In [282]:
import cv2
import numpy as np
from PIL import Image

# Step 1: Get raw segmentation output
seg_mask = seg_net.analyse_image(image)
pitch = SoccerPitch()

# Step 2: Create empty mask for cleaned results
cleaned_mask = np.zeros_like(seg_mask, dtype=np.uint8)
line_data = []

# Step 3: Extract lines from the segmentation mask
for class_idx, class_name in enumerate(SoccerPitch.lines_classes):
    if "post" in class_name or "crossbar" in class_name or "Circle" in class_name:
        continue

    class_mask = (seg_mask == class_idx + 1).astype(np.uint8)
    color = SoccerPitch.palette[class_name]

    kernel = np.ones((5, 5), np.uint8)
    class_mask = cv2.morphologyEx(class_mask, cv2.MORPH_OPEN, kernel)

    contours, _ = cv2.findContours(class_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        [vx, vy, px, py] = cv2.fitLine(largest_contour, cv2.DIST_L2, 0, 0.01, 0.01)
        points = largest_contour[:, 0, :]

        if abs(vx) > abs(vy):
            min_x, max_x = np.min(points[:, 0]), np.max(points[:, 0])
            y1 = int(((min_x - px) * vy / vx) + py)
            y2 = int(((max_x - px) * vy / vx) + py)
            pt1, pt2 = (min_x, y1), (max_x, y2)
        else:
            min_y, max_y = np.min(points[:, 1]), np.max(points[:, 1])
            x1 = int(((min_y - py) * vx / vy) + px)
            x2 = int(((max_y - py) * vx / vy) + px)
            pt1, pt2 = (x1, min_y), (x2, max_y)

        line_data.append({
            'class_name': class_name,
            'class_idx': class_idx + 1,
            'pt1': pt1,
            'pt2': pt2,
        })

# Step 4: Merge close endpoints
threshold = 20
def close(p1, p2, t=threshold):
    return np.linalg.norm(np.array(p1) - np.array(p2)) < t

for i in range(len(line_data)):
    for j in range(i + 1, len(line_data)):
        for key_i, key_j in [('pt1', 'pt1'), ('pt1', 'pt2'), ('pt2', 'pt1'), ('pt2', 'pt2')]:
            p1 = line_data[i][key_i]
            p2 = line_data[j][key_j]
            if close(p1, p2):
                avg = tuple(np.mean([p1, p2], axis=0).astype(int))
                line_data[i][key_i] = avg
                line_data[j][key_j] = avg

# Step 5: Draw lines and store homogeneous line equations
extremities_image = image.copy()
height, width = image.shape[:2]

line_matches = []
for line in line_data:
    label = line['class_name']
    color = pitch.palette.get(label, (255, 255, 255))  # fallback color if missing

    # Scale from segmentation coords (360x640) to actual image size
    pt1 = (
        int(line['pt1'][0] * (width - 1) / 640.),
        int(line['pt1'][1] * (height - 1) / 360.)
    )
    pt2 = (
        int(line['pt2'][0] * (width - 1) / 640.),
        int(line['pt2'][1] * (height - 1) / 360.)
    )

    # Draw the line
    cv2.line(extremities_image, pt1, pt2, color, 2)

    # Get homogeneous line
    img_line = np.cross([pt1[0], pt1[1], 1], [pt2[0], pt2[1], 1])
    if not np.any(np.isnan(img_line)) and not np.any(np.isinf(img_line)):
        pitch_line = pitch.get_2d_homogeneous_line(label)
        if pitch_line is not None:
            line_matches.append((pitch_line, img_line))

# Step 6: Convert to PIL image for display
extremities_pil = Image.fromarray(extremities_image)
extremities_pil
<ipython-input-282-322fbf1974ab>:32: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  y1 = int(((min_x - px) * vy / vx) + py)
<ipython-input-282-322fbf1974ab>:33: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  y2 = int(((max_x - px) * vy / vx) + py)
<ipython-input-282-322fbf1974ab>:37: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  x1 = int(((min_y - py) * vx / vy) + px)
<ipython-input-282-322fbf1974ab>:38: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  x2 = int(((max_y - py) * vx / vy) + px)
Out[282]:
No description has been provided for this image

Get homography 3D pitch to 2D image¶

In [283]:
def estimate_homography_from_line_correspondences(lines, T1=np.eye(3), T2=np.eye(3)):
    """
    Given lines correspondences, computes the homography that maps best the two set of lines.
    :param lines: list of pair of 2D lines matches.
    :param T1: Similarity transform to normalize the elements of the source reference system
    :param T2: Similarity transform to normalize the elements of the target reference system
    :return: boolean to indicate success or failure of the estimation, homography
    """
    homography = np.eye(3)
    A = np.zeros((len(lines) * 2, 9))

    for i, line_pair in enumerate(lines):
        src_line = np.transpose(np.linalg.inv(T1)) @ line_pair[0]
        target_line = np.transpose(np.linalg.inv(T2)) @ line_pair[1]
        u = src_line[0]
        v = src_line[1]
        w = src_line[2]

        x = target_line[0]
        y = target_line[1]
        z = target_line[2]

        A[2 * i, 0] = 0
        A[2 * i, 1] = x * w
        A[2 * i, 2] = -x * v
        A[2 * i, 3] = 0
        A[2 * i, 4] = y * w
        A[2 * i, 5] = -v * y
        A[2 * i, 6] = 0
        A[2 * i, 7] = z * w
        A[2 * i, 8] = -v * z

        A[2 * i + 1, 0] = x * w
        A[2 * i + 1, 1] = 0
        A[2 * i + 1, 2] = -x * u
        A[2 * i + 1, 3] = y * w
        A[2 * i + 1, 4] = 0
        A[2 * i + 1, 5] = -u * y
        A[2 * i + 1, 6] = z * w
        A[2 * i + 1, 7] = 0
        A[2 * i + 1, 8] = -u * z

    try:
        u, s, vh = np.linalg.svd(A)
    except np.linalg.LinAlgError:
        return False, homography
    v = np.eye(3)
    has_positive_singular_value = False
    for i in range(s.shape[0] - 1, -2, -1):
        v = np.reshape(vh[i], (3, 3))

        if s[i] > 0:
            has_positive_singular_value = True
            break

    if not has_positive_singular_value:
        return False, homography

    homography = np.reshape(v, (3, 3))
    homography = np.linalg.inv(T2) @ homography @ T1
    homography /= homography[2, 2]

    return True, homography

success, homography2 = estimate_homography_from_line_correspondences(line_matches)
homography2
Out[283]:
array([[ 2.09634433e+01, -2.34375734e+00, -3.92933845e+02],
       [-3.19842703e-01,  4.99133643e+00,  2.14982157e+02],
       [ 2.68076216e-03, -1.02835054e-02,  1.00000000e+00]])
In [284]:
from baseline_cameras import draw_pitch_homography

cv_image = draw_pitch_homography(rgbimage, homography2)
homography_pil = Image.fromarray(cv_image)
homography_pil
Out[284]:
No description has been provided for this image

Get projection matrix (K, R, and t)¶

Compute K

In [285]:
def estimate_calibration_matrix_from_plane_homography(cam, homography):
        """
        This method initializes the calibration matrix from the homography between the world plane of the pitch
        and the image. It is based on the extraction of the calibration matrix from the homography (Algorithm 8.2 of
        Multiple View Geometry in computer vision, p225). The extraction is sensitive to noise, which is why we keep the
        principal point in the middle of the image rather than using the one extracted by this method.
        :param homography: homography between the world plane of the pitch and the image
        """
        H = np.reshape(homography, (9,))
        A = np.zeros((5, 6))
        A[0, 1] = 1.
        A[1, 0] = 1.
        A[1, 2] = -1.
        A[2, 3] = cam.principal_point[1] / cam.principal_point[0]
        A[2, 4] = -1.0
        A[3, 0] = H[0] * H[1]
        A[3, 1] = H[0] * H[4] + H[1] * H[3]
        A[3, 2] = H[3] * H[4]
        A[3, 3] = H[0] * H[7] + H[1] * H[6]
        A[3, 4] = H[3] * H[7] + H[4] * H[6]
        A[3, 5] = H[6] * H[7]
        A[4, 0] = H[0] * H[0] - H[1] * H[1]
        A[4, 1] = 2 * H[0] * H[3] - 2 * H[1] * H[4]
        A[4, 2] = H[3] * H[3] - H[4] * H[4]
        A[4, 3] = 2 * H[0] * H[6] - 2 * H[1] * H[7]
        A[4, 4] = 2 * H[3] * H[6] - 2 * H[4] * H[7]
        A[4, 5] = H[6] * H[6] - H[7] * H[7]

        u, s, vh = np.linalg.svd(A)
        w = vh[-1]
        W = np.zeros((3, 3))
        W[0, 0] = w[0] / w[5]
        W[0, 1] = w[1] / w[5]
        W[0, 2] = w[3] / w[5]
        W[1, 0] = w[1] / w[5]
        W[1, 1] = w[2] / w[5]
        W[1, 2] = w[4] / w[5]
        W[2, 0] = w[3] / w[5]
        W[2, 1] = w[4] / w[5]
        W[2, 2] = w[5] / w[5]

        try:
            Ktinv = np.linalg.cholesky(W)
        except np.linalg.LinAlgError:
            K = np.eye(3)
            return False, K

        K = np.linalg.inv(np.transpose(Ktinv))
        K /= K[2, 2]

        cam.xfocal_length = K[0, 0]
        cam.yfocal_length = K[1, 1]
        # the principal point estimated by this method is very noisy, better keep it in the center of the image
        #cam.principal_point = (cam.image_width / 2, cam.image_height / 2)
        cam.principal_point = (K[0,2], K[1,2])
        cam.calibration = np.array([
            [cam.xfocal_length, 0, cam.principal_point[0]],
            [0, cam.yfocal_length, cam.principal_point[1]],
            [0, 0, 1]
        ], dtype='float')
        return True, K

Compute R and t

In [286]:
def from_homography(cam, homography):
        """
        This method initializes the essential camera parameters from the homography between the world plane of the pitch
        and the image. It is based on the extraction of the calibration matrix from the homography (Algorithm 8.2 of
        Multiple View Geometry in computer vision, p225), then using the relation between the camera parameters and the
        same homography, we extract rough rotation and position estimates (Example 8.1 of Multiple View Geometry in
        computer vision, p196).
        :param homography: The homography that captures the transformation between the 3D flat model of the soccer pitch
         and its image.
        """
        success, _ = estimate_calibration_matrix_from_plane_homography(cam, homography)
        if not success:
            return False

        hprim = np.linalg.inv(cam.calibration) @ homography
        lambda1 = 1 / np.linalg.norm(hprim[:, 0])
        lambda2 = 1 / np.linalg.norm(hprim[:, 1])
        lambda3 = np.sqrt(lambda1 * lambda2)

        r0 = hprim[:, 0] * lambda1
        r1 = hprim[:, 1] * lambda2
        r2 = np.cross(r0, r1)

        R = np.column_stack((r0, r1, r2))
        u, s, vh = np.linalg.svd(R)
        R = u @ vh
        if np.linalg.det(R) < 0:
            u[:, 2] *= -1
            R = u @ vh
        cam.rotation = R
        t = hprim[:, 2] * lambda3
        cam.position = - np.transpose(R) @ t
        return True
In [287]:
cam2 = Camera(960, 540)
success = from_homography(cam2, homography2)
cam2.to_json_parameters()
Out[287]:
{'pan_degrees': np.float64(14.611006701963836),
 'tilt_degrees': np.float64(63.071827337893126),
 'roll_degrees': np.float64(-2.7604715319025135),
 'position_meters': [33.00504916348341, 88.53292218278158, -32.98412384872227],
 'x_focal_length': np.float64(1654.1849167065595),
 'y_focal_length': np.float64(1654.1849167065154),
 'principal_point': [np.float64(670.5500060842609),
  np.float64(377.1843784223777)],
 'radial_distortion': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'tangential_distortion': [0.0, 0.0],
 'thin_prism_distortion': [0.0, 0.0, 0.0, 0.0]}
In [288]:
black_img = np.zeros((540, 960, 3), dtype=np.uint8)
cam2.draw_colorful_pitch(black_img, SoccerPitch.palette)
rgb_model = cv2.cvtColor(black_img, cv2.COLOR_BGR2RGB)
pitch_model = Image.fromarray(rgb_model)
pitch_model
Out[288]:
No description has been provided for this image
In [289]:
def pan_tilt_roll_to_orientation(pan, tilt, roll):
    """
    Conversion from euler angles to orientation matrix.
    :param pan:
    :param tilt:
    :param roll:
    :return: orientation matrix
    """
    Rpan = np.array([
        [np.cos(pan), -np.sin(pan), 0],
        [np.sin(pan), np.cos(pan), 0],
        [0, 0, 1]])
    Rroll = np.array([
        [np.cos(roll), -np.sin(roll), 0],
        [np.sin(roll), np.cos(roll), 0],
        [0, 0, 1]])
    Rtilt = np.array([
        [1, 0, 0],
        [0, np.cos(tilt), -np.sin(tilt)],
        [0, np.sin(tilt), np.cos(tilt)]])
    rotMat = np.dot(Rpan, np.dot(Rtilt, Rroll))
    return rotMat
In [290]:
def build_projection_matrix(params):
    fx = params['x_focal_length']
    fy = params['y_focal_length']
    cx, cy = params['principal_point']
    pan = np.radians(params['pan_degrees'])
    tilt = np.radians(params['tilt_degrees'])
    roll = np.radians(params['roll_degrees'])
    C = np.array(params['position_meters'])

    K = np.array([
        [fx,  0, cx],
        [ 0, fy, cy],
        [ 0,  0,  1]
    ])

    R_matrix = pan_tilt_roll_to_orientation(pan, tilt, roll).T
    t = -R_matrix @ C.reshape((3, 1))
    Rt = np.hstack((R_matrix, t))

    P = K @ Rt
    return P

P2 = build_projection_matrix(cam2.to_json_parameters())
print(P2)
[[ 1.75874313e+03 -1.96631206e+02  2.32645290e+02 -3.29654671e+04]
 [-2.68334332e+01  4.18751754e+02  1.64393514e+03  1.80360824e+04]
 [ 2.24904467e-01 -8.62742073e-01  4.52873157e-01  8.38957181e+01]]
In [291]:
image = cv2.imread("../images/img2.PNG")
image = cv2.resize(image, (960, 540))
rgbimage = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
In [292]:
def draw_pitch_projection_matrix(image, projection_matrix):
    """
    Draws points along the soccer pitch using a full 3x4 camera projection matrix M (P = K[R|t]).
    This function assumes that the pitch lies on the plane z=0 in world coordinates.

    :param image: the image to draw on
    :param projection_matrix: 3x4 camera projection matrix
    :return: modified image
    """
    field = SoccerPitch()
    polylines = field.sample_field_points()

    for line in polylines.values():
        for point in line:
            # We assume the pitch is at z = 0, so use only x, y
            if point[2] == 0.:
                X = np.array([point[0], point[1], 0., 1.])  # homogeneous 3D point with z = 0
                projected = projection_matrix @ X
                if projected[2] == 0.:
                    continue
                projected /= projected[2]
                x_img, y_img = projected[0], projected[1]

                # Make sure it's in bounds
                if 0 <= x_img < image.shape[1] and 0 <= y_img < image.shape[0]:
                    cv2.circle(image, (int(x_img), int(y_img)), 1, (0, 255, 255), 1)

    return image


cv_image = draw_pitch_projection_matrix(rgbimage, P2)
homography_pil = Image.fromarray(cv_image)
homography_pil
Out[292]:
No description has been provided for this image
In [293]:
img1 = cv2.imread("../images/img1.PNG")
img1 = cv2.resize(img1, (960, 540))
img2 = cv2.imread("../images/img2.PNG")
img2 = cv2.resize(img2, (960, 540))

Pose estimation model¶

In [ ]:
from torchvision import models
import torch

# pose detection model
model = models.detection.keypointrcnn_resnet50_fpn(pretrained=True)
model.eval()
In [295]:
def get_keypoints(img):
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_tensor = torch.tensor(img_rgb).permute(2, 0, 1).unsqueeze(0).float() / 255.0

    with torch.no_grad():
        output = model(img_tensor)

    keypoints = []
    for i in range(len(output[0]['keypoints'])):
        keypoints.append(output[0]['keypoints'][i].cpu().numpy())  # List of keypoints for all people

    return keypoints

# Get keypoints for all 3 images
keypoints1 = get_keypoints(img1)
keypoints2 = get_keypoints(img2)
In [296]:
def visualize_keypoints(img, keypoints_list, skeleton, player_numbers=None):
    img_copy = img.copy()

    for i, keypoints in enumerate(keypoints_list):
        # Draw keypoints
        for point in keypoints:
            if not np.any(np.isnan(point)):
                cv2.circle(img_copy, (int(point[0]), int(point[1])), 5, (0, 255, 0), -1)

        # Draw skeleton
        for joint1, joint2 in skeleton:
            if not np.any(np.isnan(keypoints[joint1])) and not np.any(np.isnan(keypoints[joint2])):
                point1 = keypoints[joint1]
                point2 = keypoints[joint2]
                cv2.line(img_copy, (int(point1[0]), int(point1[1])), (int(point2[0]), int(point2[1])), (255, 0, 0), 2)

        head = keypoints[0]  # assuming keypoint[0] is the head/nose
        if not np.any(np.isnan(head)):
            cv2.putText(img_copy, str(i), (int(head[0]), int(head[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2, cv2.LINE_AA)

    return img_copy


skeleton = [
    (5, 7), (7, 9),   # Right arm
    (6, 8), (8, 10),  # Left arm
    (5, 11), (6, 12), # Torso sides
    (11, 13), (13, 15), # Right leg
    (12, 14), (14, 16)  # Left leg
]

img1_with_keypoints = visualize_keypoints(img1, keypoints1, skeleton)
img2_with_keypoints = visualize_keypoints(img2, keypoints2, skeleton)

plt.figure(figsize=(15, 15))

# Plot for img1
plt.subplot(131)
plt.imshow(cv2.cvtColor(img1_with_keypoints, cv2.COLOR_BGR2RGB))
plt.title('Image 1 with Keypoints')
plt.axis('off')

# Plot for img2
plt.subplot(132)
plt.imshow(cv2.cvtColor(img2_with_keypoints, cv2.COLOR_BGR2RGB))
plt.title('Image 2 with Keypoints')
plt.axis('off')

plt.tight_layout()
plt.show()
No description has been provided for this image

Manually match players. During a game, we would be tracking players from the beginning to easily find correspondences.

In [297]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

def visualize_keypoints(img, keypoints_list, skeleton, players):
    img_copy = img.copy()  # Create a copy of the image
    updated_keypoints_list = []  # Initialize a list to store the processed keypoints

    for i, keypoints in enumerate(keypoints_list):
        if i in players:
            # Draw keypoints
            for point in keypoints:
                if not np.any(np.isnan(point)):
                    updated_keypoints_list.append((int(point[0]), int(point[1])))  # Append each keypoint
                    cv2.circle(img_copy, (int(point[0]), int(point[1])), 5, (0, 255, 0), -1)

            # Draw skeleton
            for joint1, joint2 in skeleton:
                if not np.any(np.isnan(keypoints[joint1])) and not np.any(np.isnan(keypoints[joint2])):
                    point1 = keypoints[joint1]
                    point2 = keypoints[joint2]
                    cv2.line(img_copy, (int(point1[0]), int(point1[1])), (int(point2[0]), int(point2[1])), (255, 0, 0), 2)

            # Draw player index near the head
            head = keypoints[0]  # Assuming keypoint[0] is the head/nose
            if not np.any(np.isnan(head)):
                cv2.putText(img_copy, str(i), (int(head[0]), int(head[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2, cv2.LINE_AA)

    return img_copy, updated_keypoints_list


# Skeleton connections (you can customize this depending on your dataset)
skeleton = [
    (5, 7), (7, 9),   # Right arm
    (6, 8), (8, 10),  # Left arm
    (5, 11), (6, 12), # Torso sides
    (11, 13), (13, 15), # Right leg
    (12, 14), (14, 16)  # Left leg
]

# List of players (indices referring to players in the keypoints list)
players1 = [3]
players2 = [4]

# Visualizing keypoints on both images
img1_with_keypoints, updated_keypoints1 = visualize_keypoints(img1, keypoints1, skeleton, players1)
img2_with_keypoints, updated_keypoints2 = visualize_keypoints(img2, keypoints2, skeleton, players2)

# Plot the images
plt.figure(figsize=(15, 15))

# Plot for img1
plt.subplot(131)
plt.imshow(cv2.cvtColor(img1_with_keypoints, cv2.COLOR_BGR2RGB))
plt.title('Image 1 with Keypoints')
plt.axis('off')

# Plot for img2
plt.subplot(132)
plt.imshow(cv2.cvtColor(img2_with_keypoints, cv2.COLOR_BGR2RGB))
plt.title('Image 2 with Keypoints')
plt.axis('off')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [298]:
print(updated_keypoints1)
print(updated_keypoints2)
[(278, 179), (277, 177), (276, 178), (277, 179), (273, 179), (277, 185), (272, 187), (278, 194), (272, 195), (281, 202), (274, 199), (278, 202), (275, 205), (284, 216), (283, 217), (278, 226), (280, 226)]
[(416, 163), (410, 143), (409, 145), (412, 144), (408, 144), (411, 144), (407, 144), (413, 146), (406, 148), (414, 157), (408, 145), (411, 149), (407, 151), (412, 152), (407, 152), (414, 160), (410, 157)]
In [299]:
points_3d = []

# triangulate points of individual players
for (x1, y1), (x2, y2) in zip(updated_keypoints1, updated_keypoints2):
    p1 = np.array([x1, y1], dtype=np.float64)
    p2 = np.array([x2, y2], dtype=np.float64)
    point_4d = cv2.triangulatePoints(P1, P2, p1, p2)  # shape: (4,)
    point_3d = (point_4d[:3] / point_4d[3]).flatten()  # shape: (3,)
    points_3d.append(point_3d)

points_3d = np.array(points_3d)  # shape: (N, 3)
print("Triangulated 3D Points:", len(points_3d))
print(points_3d)
Triangulated 3D Points: 17
[[40.71186921 -1.47496143 -1.41703563]
 [40.63317067 -1.60838102 -1.71048556]
 [40.5501163  -1.50660639 -1.68840464]
 [40.75551921 -1.82065854 -1.59872976]
 [40.5190466  -1.63958625 -1.65552946]
 [40.73426171 -1.82131304 -1.46384894]
 [40.51406735 -1.7373046  -1.45482744]
 [40.88681964 -2.06150569 -1.16932989]
 [40.44412157 -1.62330903 -1.24827978]
 [40.82346734 -1.71407627 -0.91389342]
 [40.63648159 -1.89813425 -1.12459233]
 [40.76950153 -1.87673459 -0.99393845]
 [40.51836077 -1.61111204 -0.97596221]
 [40.8627687  -1.74940371 -0.6506709 ]
 [40.5607021  -1.29250218 -0.7514296 ]
 [40.95034868 -2.21957847 -0.20494239]
 [40.73948625 -1.77299497 -0.36042509]]
In [300]:
def draw_colorful_pitch(image, palette, cam, points_3d, draw_skeleton=True):
    """
    Draws all pitch lines and player skeletons on the image.

    :param image: image to draw on
    :param palette: dict mapping pitch elements to BGR colors
    :param cam: Camera object used to project 3D points
    :param points_3d: flat list of 3D points for all players (multiple of 17)
    :param draw_skeleton: whether to draw skeletons or just player points
    :return: image with pitch and player skeletons
    """
    field = SoccerPitch()
    polylines = field.sample_field_points()

    # Draw pitch lines
    for key, line in polylines.items():
        if key not in palette:
            print(f"Can't draw {key}")
            continue
        prev_point = cam.project_point(line[0])
        for point in line[1:]:
            projected = cam.project_point(point)
            if projected[2] == 0.:
                continue
            projected /= projected[2]
            if 0 < projected[0] < cam.image_width and 0 < projected[1] < cam.image_height:
                cv2.line(image, (int(prev_point[0]), int(prev_point[1])),
                         (int(projected[0]), int(projected[1])),
                         palette[key][::-1], 1)
            prev_point = projected

    # Draw skeletons or just player points
    skeleton = [
        (5, 7), (7, 9),       # Right arm
        (6, 8), (8, 10),      # Left arm
        (5, 11), (6, 12),     # Torso sides
        (11, 13), (13, 15),   # Right leg
        (12, 14), (14, 16)    # Left leg
    ]

    num_players = len(points_3d) // 17
    for i in range(num_players):
        joints = points_3d[i * 17: (i + 1) * 17]
        projected_joints = []
        for point in joints:
            projected = cam.project_point(point)
            if projected[2] != 0:
                projected /= projected[2]
                projected_joints.append((int(projected[0]), int(projected[1])))
            else:
                projected_joints.append(None)

        if draw_skeleton:
            for a, b in skeleton:
                if projected_joints[a] is not None and projected_joints[b] is not None:
                    cv2.line(image, projected_joints[a], projected_joints[b], (0, 255, 255), 2)  # Yellow lines

        for pt in projected_joints:
            if pt is not None:
                cv2.circle(image, pt, radius=2, color=(0, 0, 255), thickness=-1)  # red joints

    return image


black_img = np.zeros((548, 960, 3), dtype=np.uint8)
draw_colorful_pitch(black_img, SoccerPitch.palette, cam2, points_3d)

rgb_model = cv2.cvtColor(black_img, cv2.COLOR_BGR2RGB)
pitch_model = Image.fromarray(rgb_model)
pitch_model
Out[300]:
No description has been provided for this image
In [301]:
def draw_colorful_pitch(image, palette, cam, points_3d, draw_skeleton=True):
    """
    Draws all pitch lines and player skeletons on the image.

    :param image: image to draw on
    :param palette: dict mapping pitch elements to BGR colors
    :param cam: Camera object used to project 3D points
    :param points_3d: flat list of 3D points for all players (multiple of 17)
    :param draw_skeleton: whether to draw skeletons or just player points
    :return: image with pitch and player skeletons
    """
    field = SoccerPitch()
    polylines = field.sample_field_points()

    # Draw pitch lines
    for key, line in polylines.items():
        if key not in palette:
            print(f"Can't draw {key}")
            continue
        prev_point = cam.project_point(line[0])
        for point in line[1:]:
            projected = cam.project_point(point)
            if projected[2] == 0.:
                continue
            projected /= projected[2]
            if 0 < projected[0] < cam.image_width and 0 < projected[1] < cam.image_height:
                cv2.line(image, (int(prev_point[0]), int(prev_point[1])),
                         (int(projected[0]), int(projected[1])),
                         palette[key][::-1], 1)
            prev_point = projected

    # Draw skeletons or just player points
    skeleton = [
        (5, 7), (7, 9),       # Right arm
        (6, 8), (8, 10),      # Left arm
        (5, 11), (6, 12),     # Torso sides
        (11, 13), (13, 15),   # Right leg
        (12, 14), (14, 16)    # Left leg
    ]

    num_players = len(points_3d) // 17
    for i in range(num_players):
        joints = points_3d[i * 17: (i + 1) * 17]
        projected_joints = []
        for point in joints:
            projected = cam.project_point(point)
            if projected[2] != 0:
                projected /= projected[2]
                projected_joints.append((int(projected[0]), int(projected[1])))
            else:
                projected_joints.append(None)

        if draw_skeleton:
            for a, b in skeleton:
                if projected_joints[a] is not None and projected_joints[b] is not None:
                    cv2.line(image, projected_joints[a], projected_joints[b], (0, 255, 255), 2)  # Yellow lines

        for pt in projected_joints:
            if pt is not None:
                cv2.circle(image, pt, radius=2, color=(0, 0, 255), thickness=-1)  # red joints

    return image


camera = Camera(960, 540)
camera.xfocal_length = 600
camera.yfocal_length = 600
camera.position = [30, 0, -60]
black_img = np.zeros((540, 960, 3), dtype=np.uint8)
draw_colorful_pitch(black_img, SoccerPitch.palette, camera, points_3d)

rgb_model = cv2.cvtColor(black_img, cv2.COLOR_BGR2RGB)
pitch_model = Image.fromarray(rgb_model)
pitch_model
Out[301]:
No description has been provided for this image
In [302]:
def draw_colorful_pitch(image, palette, cam, points_3d, draw_skeleton=True):
    """
    Draws all pitch lines and player skeletons on the image.

    :param image: image to draw on
    :param palette: dict mapping pitch elements to BGR colors
    :param cam: Camera object used to project 3D points
    :param points_3d: flat list of 3D points for all players (multiple of 17)
    :param draw_skeleton: whether to draw skeletons or just player points
    :return: image with pitch and player skeletons
    """
    field = SoccerPitch()
    polylines = field.sample_field_points()

    # Draw pitch lines
    for key, line in polylines.items():
        if key not in palette:
            print(f"Can't draw {key}")
            continue
        prev_point = cam.project_point(line[0])
        for point in line[1:]:
            projected = cam.project_point(point)
            if projected[2] == 0.:
                continue
            projected /= projected[2]
            if 0 < projected[0] < cam.image_width and 0 < projected[1] < cam.image_height:
                cv2.line(image, (int(prev_point[0]), int(prev_point[1])),
                         (int(projected[0]), int(projected[1])),
                         palette[key][::-1], 1)
            prev_point = projected

    # Draw skeletons or just player points
    skeleton = [
        (5, 7), (7, 9),       # Right arm
        (6, 8), (8, 10),      # Left arm
        (5, 11), (6, 12),     # Torso sides
        (11, 13), (13, 15),   # Right leg
        (12, 14), (14, 16)    # Left leg
    ]

    num_players = len(points_3d) // 17
    for i in range(num_players):
        joints = points_3d[i * 17: (i + 1) * 17]
        projected_joints = []
        for point in joints:
            projected = cam.project_point(point)
            if projected[2] != 0:
                projected /= projected[2]
                projected_joints.append((int(projected[0]), int(projected[1])))
            else:
                projected_joints.append(None)

        if draw_skeleton:
            for a, b in skeleton:
                if projected_joints[a] is not None and projected_joints[b] is not None:
                    cv2.line(image, projected_joints[a], projected_joints[b], (0, 255, 255), 2)  # Yellow lines

        for pt in projected_joints:
            if pt is not None:
                cv2.circle(image, pt, radius=2, color=(0, 0, 255), thickness=-1)  # red joints

    return image


camera = Camera(960, 540)
camera.xfocal_length = 1500
camera.yfocal_length = 1500
camera.position = [40, 56, -2]
camera.rotation = pan_tilt_roll_to_orientation(0, np.radians(90), 0).T
black_img = np.zeros((540, 960, 3), dtype=np.uint8)
draw_colorful_pitch(black_img, SoccerPitch.palette, camera, points_3d)

rgb_model = cv2.cvtColor(black_img, cv2.COLOR_BGR2RGB)
pitch_model = Image.fromarray(rgb_model)
pitch_model
Out[302]:
No description has been provided for this image